



/********************************************************************
* IPW / IPWRA DIAGNOSTICS
* - Selection (attrition) weights for samples S ∈ {sample1_ext, sample2,
*   sample3_ext, sample5_ext}:  S ~ D + xvars + bels + strat
* - Treatment diagnostics for D using the SAME treatment model as in
*   the paper: (D `vars_sample3' `strat') with
*       local vars_sample3 `xvars' `bel2'
*   and selection weights w_sel_S as pweights.
********************************************************************/


********************************************************************
* FINAL CONTROL SETS (CONSISTENT WITH MAIN ANALYSIS)
********************************************************************

* Belief blocks
local bel1 fob1 sob1 sob1sdb sob1m 
local bel2 fob2 sob2 sob2m sob2p
local bel3 fob3 sob3 sob3sdb sob3m sob3p
local bel4 fob4 fob6
local bel5 fob5 sob5 sob5m sob5p
local bel6 fob7 sob7 sob7m sob7p
local bel7 fob8 sob8 sob8m sob8p
local bel8 e501 e502 e503 e501b e504 e505 e506 e504b
local bel9 fob9

* Beliefs used in selection models (same as Table 5 / attrition code)
local bels `bel1' `bel2' `bel3' `bel4' `bel6' `bel7' `bel8' `bel9'

* Baseline covariates (non-beliefs) – EXACTLY as in your final code
local xvars female c1 c2_2_rp c3 c5 f6 age_dif  ///
             level_1 level_2 edu_WmoreH inactiveM unemployedM estrato_12 estrato_3 tec_2 

* Strata FE
local strat inactiveF employedF fob2M

* Treatment-model covariate set used in ALL teffects ipwra in the paper
* (see Table 6 do-file): teffects ipwra (...)(D `vars_sample3' `strat')
local vars_sample3 `xvars' `bel2'

* Samples used in Tables 6–9 and attrition (final)
local samples sample1_ext sample2 sample3_ext sample5_ext

* Sample labels
local lab_sample1_ext "Midline Survey"
local lab_sample2     "Endline Survey"
local lab_sample3_ext "Both"
local lab_sample5_ext "Endline Only"

* Helper: safe min/max
cap program drop _safeminmax
program define _safeminmax, rclass
    syntax varname [if]
    quietly summarize `varlist' `if', meanonly
    return scalar min = r(min)
    return scalar max = r(max)
end


********************************************************************
* 0. BUILD SELECTION (ATTRITION) WEIGHTS FOR EACH SAMPLE S
*    S ~ D + xvars + bels + strat   (probit)
*    w_sel_S = 1 / Pr(S=1 | D,X)
********************************************************************

local sel_rhs D `xvars' `bels' `strat'

foreach S of local samples {
    cap confirm variable `S'
    if _rc {
        di as txt ">> Sample variable `S' not found. Skipping selection weights."
        continue
    }

    di as txt _n ">>> Building selection weights for `S'"

    cap drop ps_sel_`S' w_sel_`S'
    quietly probit `S' `sel_rhs', vce(cluster id)
    predict double ps_sel_`S', pr

    gen double w_sel_`S' = .
    replace w_sel_`S' = 1/ps_sel_`S' if `S'==1
    label var w_sel_`S' "Selection IPW: 1/Pr(`S'=1 | D,X)"
}




********************************************************************
* 1B. SELECTIVE EXPOSURE DIAGNOSTICS (D) — POOLED SAMPLE
*     (men + women together, same model and weights)
********************************************************************

cap log close
log using "SelectiveExposure_Diagnostics_pooled.log", text replace 

di as txt "==================================================================="
di as txt "SELECTIVE EXPOSURE DIAGNOSTICS (D) — POOLED SAMPLE"
di as txt "Date/time: " c(current_date) "  " c(current_time)
di as txt "===================================================================" _n

foreach S of local samples {
    cap confirm variable `S'
    if _rc continue

    cap confirm variable w_sel_`S'
    if _rc {
        di as txt ">> Selection weights w_sel_`S' not found. Skipping pooled diagnostics for `S'."
        continue
    }

    * Human-readable label
    local slab "`S'"
    capture confirm local lab_`S'
    if !_rc local slab "`lab_`S''"

    di as txt _n "-------------------------------------------------------------------"
    di as txt "`S' — `slab'  |  POOLED (men + women)"
    di as txt "D-model RHS (pooled): `vars_sample3' `strat'"
    di as txt "Using selection weights: w_sel_`S'"

    * Fake outcome to feed teffects/tebalance (same idea as before)
    cap drop y0_bal_p
    cap confirm variable c1
    if _rc gen double c1 = .
    quietly summarize c1 if `S'==1, meanonly
    if r(N)==0 {
        set seed 12345
        gen double y0_bal_p = runiform() if `S'==1
    }
    else {
        gen double y0_bal_p = c1 if `S'==1
        replace y0_bal_p = 0 if `S'==1 & missing(y0_bal_p)
    }

    * teffects ipwra for D, pooled sample
    cap confirm variable id
    if _rc teffects ipwra (y0_bal_p `strat') (D `vars_sample3' `strat') ///
            if `S'==1 [pweight = w_sel_`S'], atet vce(robust)
    else    teffects ipwra (y0_bal_p `strat') (D `vars_sample3' `strat') ///
            if `S'==1 [pweight = w_sel_`S'], atet vce(cluster id)

    * (1) Post-weighting balance across D (SMDs)
    di as txt "tebalance summarize (SMDs across D=1 vs D=0):"
    tebalance summarize

    * (2) Overidentification J-test
    capture noisily tebalance overid
    if _rc==0 {
        scalar Jstat_p = r(chi2)
        scalar Jdf_p   = r(df)
        scalar Jp_p    = r(p)
        di as res "Overidentification (pooled): J = " %6.2f Jstat_p ///
                  "  df = " %2.0f Jdf_p "  p = " %6.3f Jp_p
    }
    else di as txt "Overidentification: not available for this pooled subset."

    * (3) Overlap + ESS for D-weights (same formulas as before)
    tempvar psD_p wstab_p w2_p outcs_p w_out_p

    predict double `psD_p' if e(sample), ps
    quietly summarize D if e(sample), meanonly
    scalar pt_p = r(mean)
    scalar pc_p = 1-pt_p

    gen double `wstab_p' = .                               if e(sample)
    replace `wstab_p' = 1                                  if e(sample) & D==1
    replace `wstab_p' = (pt_p/pc_p) * (`psD_p'/(1-`psD_p')) if e(sample) & D==0
    replace `wstab_p' = min(`wstab_p', 1e6)                if e(sample)

    * Common support window
    qui _safeminmax `psD_p' if e(sample) & D==1
    scalar minps_t_p = r(min)
    scalar maxps_t_p = r(max)
    qui _safeminmax `psD_p' if e(sample) & D==0
    scalar minps_c_p = r(min)
    scalar maxps_c_p = r(max)
    scalar LCS_p = max(minps_t_p, minps_c_p)
    scalar UCS_p = min(maxps_t_p, maxps_c_p)

    gen byte   `outcs_p' = (`psD_p' < LCS_p | `psD_p' > UCS_p) if e(sample)
    gen double `w_out_p' = `wstab_p'*`outcs_p'                 if e(sample)
    quietly summarize `wstab_p' if e(sample), meanonly
    scalar sw_all_p = r(sum)
    quietly summarize `w_out_p', meanonly
    scalar share_out_w_p = r(sum)/sw_all_p

    di as res "  Weighted mass outside common support (pooled) = " ///
        %6.3f share_out_w_p

    * Effective Sample Size (ESS)
    gen double `w2_p' = `wstab_p'^2 if e(sample)
    quietly summarize `wstab_p' if e(sample), meanonly
    scalar sw_p  = r(sum)
    quietly summarize `w2_p' if e(sample), meanonly
    scalar sw2_p = r(sum)
    scalar ess_all_p = (sw_p^2)/sw2_p

    di as res "  ESS(All, pooled) = " %9.1f ess_all_p

    di as txt ">>> For the table, record:"
    di as txt "    - Max |SMD| raw and weighted from tebalance summarize"
    di as txt "    - J-test p-value = " %6.3f Jp_p
    di as txt "    - ESS(All, pooled) = " %9.1f ess_all_p
    di as txt "    - Mass outside CS (%) = " %6.2f (100*share_out_w_p)
}

log close

stop 

********************************************************************
* 1. SELECTIVE EXPOSURE DIAGNOSTICS (D) VIA IPWRA + tebalance
*    - For each sample S and gender g.
*    - Treatment-model for D: EXACTLY as in the paper,
*         (D `vars_sample3' `strat')
*      with 'female' dropped when splitting by gender.
*    - Selection weights w_sel_S used as pweights.
********************************************************************

cap log close
log using "SelectiveExposure_Diagnostics.log", text replace 

di as txt "==================================================================="
di as txt "SELECTIVE EXPOSURE DIAGNOSTICS (D) — IPWRA + tebalance"
di as txt "Date/time: " c(current_date) "  " c(current_time)
di as txt "===================================================================" _n

* Panel collectors for PS densities across samples
local P_men
local P_women
*** NEW: pooled (men + women together)
local P_all

foreach S of local samples {
    cap confirm variable `S'
    if _rc continue

    cap confirm variable w_sel_`S'
    if _rc {
        di as txt ">> Selection weights w_sel_`S' not found. Skipping D-diagnostics for `S'."
        continue
    }

    * Get human-readable label for this sample
    local slab "`S'"
    capture confirm local lab_`S'
    if !_rc local slab "`lab_`S''"  

    * RHS for D-model: SAME as in teffects ipwra in the paper
    * teffects ipwra (Y ...) (D `vars_sample3' `strat')
    local rhs_all `vars_sample3' `strat'

    * Drop 'female' for gender-split models
    local rhs
    foreach tok of local rhs_all {
        if "`tok'"!="female" local rhs `rhs' `tok'
    }

    *------------------------------*
    * BY GENDER (existing code)
    *------------------------------*
    foreach g in 0 1 {
        count if `S'==1 & female==`g'
        if r(N)==0 {
            di as txt ">> No obs in `S' × sex=`g'. Skipping."
            continue
        }
        if `g'==0 local sexlbl Men
        else      local sexlbl Women

        di as txt _n "-------------------------------------------------------------------"
        di as txt "`S' — `slab'  |  Sex: `sexlbl'"
        di as txt "D-model RHS: `rhs'"
        di as txt "Using selection weights: w_sel_`S'"

        * Fake outcome to feed teffects/tebalance
        cap drop y0_bal
        cap confirm variable c1
        if _rc gen double c1 = .
        quietly summarize c1 if `S'==1, meanonly
        if r(N)==0 {
            set seed 12345
            gen double y0_bal = runiform() if `S'==1
        }
        else {
            gen double y0_bal = c1 if `S'==1
            replace y0_bal = 0 if `S'==1 & missing(y0_bal)
        }

        * teffects ipwra for D with selection weights as pweights
        cap confirm variable id
        if _rc teffects ipwra (y0_bal `strat') (D `rhs') ///
                if `S'==1 & female==`g' [pweight = w_sel_`S'], atet vce(robust)
        else    teffects ipwra (y0_bal `strat') (D `rhs') ///
                if `S'==1 & female==`g' [pweight = w_sel_`S'], atet vce(cluster id)

        * (1) Post-weighting balance across D (standardized differences)
        di as txt "tebalance summarize (SMDs across D=1 vs D=0):"
        tebalance summarize

        * (2) Overidentification J-test
        capture noisily tebalance overid
        if _rc==0 {
            scalar Jstat = r(chi2)
            scalar Jdf   = r(df)
            scalar Jp    = r(p)
            di as res "Overidentification: J = " %6.2f Jstat "  df = " %2.0f Jdf "  p = " %6.3f Jp
        }
        else di as txt "Overidentification: not available for this subset."

        * (3) PS and ATT weights for D (for overlap and ESS)
        tempvar psD wstab w2 outcs w_out x0r d0r x1r d1r x0w d0w x1w d1w

        predict double `psD' if e(sample), ps
        quietly summarize D if e(sample), meanonly
        scalar pt = r(mean)
        scalar pc = 1-pt

        gen double `wstab' = .                            if e(sample)
        replace `wstab' = 1                               if e(sample) & D==1
        replace `wstab' = (pt/pc) * (`psD'/(1-`psD'))     if e(sample) & D==0
        replace `wstab' = min(`wstab', 1e6)               if e(sample)

        * kdensity: raw vs weighted PS by D
        qui kdensity `psD' if e(sample) & D==0, n(200) gen(`x0r' `d0r')
        qui su `d0r', meanonly
        scalar y0r = r(max)

        qui kdensity `psD' if e(sample) & D==1, n(200) gen(`x1r' `d1r')
        qui su `d1r', meanonly
        scalar y1r = r(max)

        qui kdensity `psD' [aw=`wstab'] if e(sample) & D==0, n(200) gen(`x0w' `d0w')
        qui su `d0w', meanonly
        scalar y0w = r(max)

        qui kdensity `psD' [aw=`wstab'] if e(sample) & D==1, n(200) gen(`x1w' `d1w')
        qui su `d1w', meanonly
        scalar y1w = r(max)

        scalar ytop = max(y0r,y1r,y0w,y1w)*1.05
        local ytop = `=ytop'
        if `ytop'<=0 local ytop = 1

        * RAW PS densities (left panel)
        twoway ///
            (kdensity `psD' if e(sample) & D==0, n(200) lpattern(solid) lwidth(medthick)) ///
            (kdensity `psD' if e(sample) & D==1, n(200) lpattern(dash)  lwidth(medthick)), ///
            scheme(s2mono) ///
            title("`lab_`S''", size(small)) ///
            legend(order(1 "D=0" 2 "D=1") cols(2) size(small) region(lcolor(none))) ///
            xtitle("") ytitle("") ///
            xlabel(0(.2)1, labsize(small)) ///
            ylabel(, labsize(small) angle(0)) ///
            xscale(range(0 1)) yscale(range(0 `ytop')) ///
            plotregion(lcolor(none)) graphregion(color(white)) ///
            name(g_raw_`S'_sex`g', replace)

        * WEIGHTED PS densities (right panel)
        twoway ///
            (kdensity `psD' if e(sample) & D==0 [aw=`wstab'], n(200) lpattern(solid) lwidth(medthick)) ///
            (kdensity `psD' if e(sample) & D==1 [aw=`wstab'], n(200) lpattern(dash)  lwidth(medthick)), ///
            scheme(s2mono) ///
            title("`lab_`S''", size(small)) ///
            legend(off) ///
            xtitle("") ytitle("") ///
            xlabel(0(.2)1, labsize(small)) ///
            ylabel(, labsize(small) angle(0)) ///
            xscale(range(0 1)) yscale(range(0 `ytop')) ///
            plotregion(lcolor(none)) graphregion(color(white)) ///
            name(g_wgt_`S'_sex`g', replace)

        * Collect panel names
        if `g'==0 local P_men   `P_men'   g_raw_`S'_sex`g' g_wgt_`S'_sex`g'
        if `g'==1 local P_women `P_women' g_raw_`S'_sex`g' g_wgt_`S'_sex`g'

        * Overlap window + mass outside support (for D-weights)
        qui _safeminmax `psD' if e(sample) & D==1
        scalar minps_t = r(min)
        scalar maxps_t = r(max)
        qui _safeminmax `psD' if e(sample) & D==0
        scalar minps_c = r(min)
        scalar maxps_c = r(max)
        scalar LCS = max(minps_t, minps_c)
        scalar UCS = min(maxps_t, maxps_c)

        gen byte   `outcs' = (`psD' < LCS | `psD' > UCS) if e(sample)
        gen double `w_out' = `wstab'*`outcs'             if e(sample)
        quietly summarize `wstab' if e(sample), meanonly
        scalar sw_all = r(sum)
        quietly summarize `w_out', meanonly
        scalar share_out_w = r(sum)/sw_all

        di as txt "Propensity overlap for D (after selection weighting):"
        di as res "  LCS/UCS = " %6.3f LCS " / " %6.3f UCS ///
                  "   Treated min/max = " %6.3f minps_t " / " %6.3f maxps_t ///
                  "   Control min/max = " %6.3f minps_c " / " %6.3f maxps_c
        di as res "  Weighted mass outside common support = " %6.3f share_out_w

        * ESS for D-weights
        gen double `w2' = `wstab'^2 if e(sample)
        quietly summarize `wstab' if e(sample), meanonly
        scalar sw  = r(sum)
        quietly summarize `w2' if e(sample), meanonly
        scalar sw2 = r(sum)
        scalar ess_all = (sw^2)/sw2

        quietly summarize `wstab' if e(sample) & D==1, meanonly
        scalar sw_t  = r(sum)
        quietly summarize `w2' if e(sample) & D==1, meanonly
        scalar sw2_t = r(sum)
        scalar ess_t = (sw_t^2)/sw2_t

        quietly summarize `wstab' if e(sample) & D==0, meanonly
        scalar sw_c  = r(sum)
        quietly summarize `w2' if e(sample) & D==0, meanonly
        scalar sw2_c = r(sum)
        scalar ess_c = (sw_c^2)/sw2_c

        di as txt "Effective Sample Size for D-weights (stabilized ATT):"
        di as res "  ESS(All) = " %9.1f ess_all "   ESS(D=1) = " %9.1f ess_t "   ESS(D=0) = " %9.1f ess_c
    }

    *--------------------------------------------------------------*
    * NEW: POOLED SELECTIVE EXPOSURE (MEN + WOMEN TOGETHER)
    *--------------------------------------------------------------*
    count if `S'==1
    if r(N)>0 {
        di as txt _n "-------------------------------------------------------------------"
        di as txt "`S' — `slab'  |  Pooled (men and women)"
        di as txt "D-model RHS (pooled): `rhs_all'"
        di as txt "Using selection weights: w_sel_`S'"

        cap drop y0_bal
        cap confirm variable c1
        if _rc gen double c1 = .
        quietly summarize c1 if `S'==1, meanonly
        if r(N)==0 {
            set seed 12345
            gen double y0_bal = runiform() if `S'==1
        }
        else {
            gen double y0_bal = c1 if `S'==1
            replace y0_bal = 0 if `S'==1 & missing(y0_bal)
        }

        cap confirm variable id
        if _rc teffects ipwra (y0_bal `strat') (D `rhs_all') ///
                if `S'==1 [pweight = w_sel_`S'], atet vce(robust)
        else    teffects ipwra (y0_bal `strat') (D `rhs_all') ///
                if `S'==1 [pweight = w_sel_`S'], atet vce(cluster id)

        di as txt "tebalance summarize (SMDs across D=1 vs D=0), pooled:"
        tebalance summarize

        capture noisily tebalance overid
        if _rc==0 {
            scalar Jstat_p = r(chi2)
            scalar Jdf_p   = r(df)
            scalar Jp_p    = r(p)
            di as res "Overidentification (pooled): J = " %6.2f Jstat_p "  df = " %2.0f Jdf_p "  p = " %6.3f Jp_p
        }
        else di as txt "Overidentification: not available for pooled subset."

        tempvar psD_all wstab_all w2_all outcs_all w_out_all x0r_all d0r_all x1r_all d1r_all x0w_all d0w_all x1w_all d1w_all

        predict double `psD_all' if e(sample), ps
        quietly summarize D if e(sample), meanonly
        scalar pt_all = r(mean)
        scalar pc_all = 1-pt_all

        gen double `wstab_all' = .                                 if e(sample)
        replace `wstab_all' = 1                                   if e(sample) & D==1
        replace `wstab_all' = (pt_all/pc_all) * (`psD_all'/(1-`psD_all')) ///
            if e(sample) & D==0
        replace `wstab_all' = min(`wstab_all', 1e6)               if e(sample)

        qui kdensity `psD_all' if e(sample) & D==0, n(200) gen(`x0r_all' `d0r_all')
        qui su `d0r_all', meanonly
        scalar y0r_all = r(max)

        qui kdensity `psD_all' if e(sample) & D==1, n(200) gen(`x1r_all' `d1r_all')
        qui su `d1r_all', meanonly
        scalar y1r_all = r(max)

        qui kdensity `psD_all' [aw=`wstab_all'] if e(sample) & D==0, n(200) gen(`x0w_all' `d0w_all')
        qui su `d0w_all', meanonly
        scalar y0w_all = r(max)

        qui kdensity `psD_all' [aw=`wstab_all'] if e(sample) & D==1, n(200) gen(`x1w_all' `d1w_all')
        qui su `d1w_all', meanonly
        scalar y1w_all = r(max)

        scalar ytop_all = max(y0r_all,y1r_all,y0w_all,y1w_all)*1.05
        local ytop_all = `=ytop_all'
        if `ytop_all'<=0 local ytop_all = 1

        twoway ///
            (kdensity `psD_all' if e(sample) & D==0, n(200) lpattern(solid) lwidth(medthick)) ///
            (kdensity `psD_all' if e(sample) & D==1, n(200) lpattern(dash)  lwidth(medthick)), ///
            scheme(s2mono) ///
            title("`lab_`S''", size(small)) ///
            legend(order(1 "D=0" 2 "D=1") cols(2) size(small) region(lcolor(none))) ///
            xtitle("") ytitle("") ///
            xlabel(0(.2)1, labsize(small)) ///
            ylabel(, labsize(small) angle(0)) ///
            xscale(range(0 1)) yscale(range(0 `ytop_all')) ///
            plotregion(lcolor(none)) graphregion(color(white)) ///
            name(g_raw_`S'_all, replace)

        twoway ///
            (kdensity `psD_all' if e(sample) & D==0 [aw=`wstab_all'], n(200) lpattern(solid) lwidth(medthick)) ///
            (kdensity `psD_all' if e(sample) & D==1 [aw=`wstab_all'], n(200) lpattern(dash)  lwidth(medthick)), ///
            scheme(s2mono) ///
            title("`lab_`S''", size(small)) ///
            legend(off) ///
            xtitle("") ytitle("") ///
            xlabel(0(.2)1, labsize(small)) ///
            ylabel(, labsize(small) angle(0)) ///
            xscale(range(0 1)) yscale(range(0 `ytop_all')) ///
            plotregion(lcolor(none)) graphregion(color(white)) ///
            name(g_wgt_`S'_all, replace)

        local P_all `P_all' g_raw_`S'_all g_wgt_`S'_all

        qui _safeminmax `psD_all' if e(sample) & D==1
        scalar minps_t_all = r(min)
        scalar maxps_t_all = r(max)
        qui _safeminmax `psD_all' if e(sample) & D==0
        scalar minps_c_all = r(min)
        scalar maxps_c_all = r(max)
        scalar LCS_all = max(minps_t_all, minps_c_all)
        scalar UCS_all = min(maxps_t_all, maxps_c_all)

        gen byte   `outcs_all' = (`psD_all' < LCS_all | `psD_all' > UCS_all) if e(sample)
        gen double `w_out_all' = `wstab_all'*`outcs_all'                     if e(sample)
        quietly summarize `wstab_all' if e(sample), meanonly
        scalar sw_all_all = r(sum)
        quietly summarize `w_out_all', meanonly
        scalar share_out_w_all = r(sum)/sw_all_all

        di as txt "Propensity overlap for D (pooled, after selection weighting):"
        di as res "  LCS/UCS = " %6.3f LCS_all " / " %6.3f UCS_all ///
                  "   Treated min/max = " %6.3f minps_t_all " / " %6.3f maxps_t_all ///
                  "   Control min/max = " %6.3f minps_c_all " / " %6.3f maxps_c_all
        di as res "  Weighted mass outside common support (pooled) = " %6.3f share_out_w_all

        gen double `w2_all' = `wstab_all'^2 if e(sample)
        quietly summarize `wstab_all' if e(sample), meanonly
        scalar sw_allW  = r(sum)
        quietly summarize `w2_all' if e(sample), meanonly
        scalar sw2_allW = r(sum)
        scalar ess_allW = (sw_allW^2)/sw2_allW

        quietly summarize `wstab_all' if e(sample) & D==1, meanonly
        scalar sw_t_all  = r(sum)
        quietly summarize `w2_all' if e(sample) & D==1, meanonly
        scalar sw2_t_all = r(sum)
        scalar ess_t_all = (sw_t_all^2)/sw2_t_all

        quietly summarize `wstab_all' if e(sample) & D==0, meanonly
        scalar sw_c_all  = r(sum)
        quietly summarize `w2_all' if e(sample) & D==0, meanonly
        scalar sw2_c_all = r(sum)
        scalar ess_c_all = (sw_c_all^2)/sw2_c_all

        di as txt "Effective Sample Size for D-weights (pooled, stabilized ATT):"
        di as res "  ESS(All) = " %9.1f ess_allW "   ESS(D=1) = " %9.1f ess_t_all "   ESS(D=0) = " %9.1f ess_c_all
    }
}

log close

* Combine panels across samples by sex (optional multi-panel figures)
local P_men_ok
foreach G of local P_men {
    cap graph describe `G'
    if !_rc local P_men_ok `P_men_ok' `G'
}

local P_women_ok
foreach G of local P_women {
    cap graph describe `G'
    if !_rc local P_women_ok `P_women_ok' `G'
}

*** NEW: pooled across samples
local P_all_ok
foreach G of local P_all {
    cap graph describe `G'
    if !_rc local P_all_ok `P_all_ok' `G'
}

local refm : word 1 of `P_men_ok'
local refw : word 1 of `P_women_ok'
local refall : word 1 of `P_all_ok'

cap which grc1leg
local HAS_GRC = (_rc==0)

* MEN
if "`P_men_ok'" != "" {
    if `HAS_GRC' {
        grc1leg `P_men_ok', cols(2) ycommon xcommon imargin(tiny) ///
            legendfrom(`refm') l1("Density") b1("Propensity Score") ///
            title("Men", size(medsmall)) ///
            subtitle("Raw (left) vs Weighted (right)", size(small)) ///
            plotregion(lcolor(none)) graphregion(color(white)) ///
            name(Fig_TREATMENT_psdens_Men, replace)
    }
    else {
        graph combine `P_men_ok', cols(2) ycommon xcommon imargin(tiny) ///
            title("Men", size(medsmall)) ///
            name(Fig_TREATMENT_psdens_Men, replace)
    }
    graph export "Figure_TREATMENT_pscore_densities_Men.pdf", as(pdf) replace
}

* WOMEN
if "`P_women_ok'" != "" {
    if `HAS_GRC' {
        grc1leg `P_women_ok', cols(2) ycommon xcommon imargin(tiny) ///
            legendfrom(`refw') l1("Density") b1("Propensity Score") ///
            title("Women", size(medsmall)) ///
            subtitle("Raw (left) vs Weighted (right)", size(small)) ///
            plotregion(lcolor(none)) graphregion(color(white)) ///
            name(Fig_TREATMENT_psdens_Women, replace)
    }
    else {
        graph combine `P_women_ok', cols(2) ycommon xcommon imargin(tiny) ///
            title("Women", size(medsmall)) ///
            name(Fig_TREATMENT_psdens_Women, replace)
    }
    graph export "Figure_TREATMENT_pscore_densities_Women.pdf", as(pdf) replace
}

*** NEW: POOLED (MEN + WOMEN)
if "`P_all_ok'" != "" {
    if `HAS_GRC' {
        grc1leg `P_all_ok', cols(2) ycommon xcommon imargin(tiny) ///
            legendfrom(`refall') l1("Density") b1("Propensity Score") ///
            title("Pooled sample", size(medsmall)) ///
            subtitle("Raw (left) vs Weighted (right)", size(small)) ///
            plotregion(lcolor(none)) graphregion(color(white)) ///
            name(Fig_TREATMENT_psdens_All, replace)
    }
    else {
        graph combine `P_all_ok', cols(2) ycommon xcommon imargin(tiny) ///
            title("Pooled sample", size(medsmall)) ///
            name(Fig_TREATMENT_psdens_All, replace)
    }
    graph export "Figure_TREATMENT_pscore_densities_All.pdf", as(pdf) replace
}


********************************************************************
* 2. ATTRITION / SELECTION DIAGNOSTICS FOR w_sel_S
*    - Shows that w_sel_S reweights S==1 back to baseline on (xvars, bels, strat)
*    - Also shows w_sel_S and ps_sel_S distributions.
********************************************************************

cap log close
log using "Attrition_Diagnostics.log", text replace

di as txt "==================================================================="
di as txt "ATTRITION / SELECTION DIAGNOSTICS for w_sel_S"
di as txt "Date/time: " c(current_date) "  " c(current_time)
di as txt "===================================================================" _n

local attr_covars `xvars' `bels' `strat'

foreach S of local samples {
    cap confirm variable `S'
    if _rc continue

    cap confirm variable w_sel_`S'
    if _rc {
        di as txt ">> Selection weights w_sel_`S' not found. Skipping `S'."
        continue
    }

    * Sample label
    local slab "`S'"
    capture confirm local lab_`S'

    di as txt _n "-------------------------------------------------------------------"
    di as txt "Selection / attrition diagnostics for `S' — `slab'"
    di as txt "Selection weight: w_sel_`S' = 1/Pr(`S'=1 | D,X)"

    preserve
        keep if inlist(`S',0,1)

        di as txt "Var                    Mean_base   Mean_S1_unw   Mean_S1_w    SMD_unw    SMD_w"

        foreach v of local attr_covars {
            capture confirm variable `v'
            if _rc continue

            * Baseline (full baseline sample)
            quietly summarize `v'
            scalar m_base = r(mean)
            scalar sd_base = r(sd)

            * Within-`S' sample (unweighted)
            quietly summarize `v' if `S'==1
            scalar m_s1_unw = r(mean)

            * Within-`S' sample, reweighted by w_sel_`S'
            quietly summarize `v' [aw = w_sel_`S'] if `S'==1
            scalar m_s1_w   = r(mean)

            scalar diff_unw = m_s1_unw - m_base
            scalar diff_w   = m_s1_w   - m_base

            scalar smd_unw = .
            scalar smd_w   = .
            if sd_base>0 {
                scalar smd_unw = diff_unw / sd_base
                scalar smd_w   = diff_w   / sd_base
            }

            di as res %22s "`v'" "  " ///
               %10.3f m_base "  " ///
               %10.3f m_s1_unw "  " ///
               %10.3f m_s1_w "  " ///
               %10.3f smd_unw "  " ///
               %10.3f smd_w
        }

        * Selection weight distribution (S=1)
        quietly summarize w_sel_`S' if `S'==1, detail
        di as txt "Selection weights w_sel_`S' (S=1) summary:"
        di as res "  N=" %6.0f r(N) ///
                  "  mean=" %8.3f r(mean) ///
                  "  p1=" %8.3f r(p1) ///
                  "  p99=" %8.3f r(p99) ///
                  "  max=" %8.3f r(max)

        histogram w_sel_`S' if `S'==1, ///
            percent bin(40) ///
            title("Selection weights w_sel_`S'' – `slab'") ///
            xtitle("w_sel_`S'") ytitle("Percent") ///
            name(h_wsel_`S', replace)
        graph export "Figure_ATTRITION_weights_`S'.pdf", as(pdf) replace

        * PS distributions for selection (for intuition)
        cap confirm variable ps_sel_`S'
        if !_rc {
            twoway ///
                (kdensity ps_sel_`S' if `S'==0, n(200) lpattern(solid) lwidth(medthick)) ///
                (kdensity ps_sel_`S' if `S'==1, n(200) lpattern(dash)  lwidth(medthick)), ///
                scheme(s2mono) ///
                title("`lab_`S''") ///
                legend(order(1 "S=0" 2 "S=1") cols(2) size(small) region(lcolor(none))) ///
                xtitle("Pr(`S'=1 | D,X)") ytitle("Density") ///
				plotregion(lcolor(none)) graphregion(color(white)) ///
                name(k_ps_sel_`S', replace)
            graph export "Figure_ATTRITION_ps_`S'.pdf", as(pdf) replace
        }

    restore
}

log close

*--------------------------------------------------------------*
* Combine selection PS densities across samples into one figure
*   -> Figure_ATTRITION_pscore_densities_ALL.pdf
*--------------------------------------------------------------*

* Check if grc1leg is installed
cap which grc1leg
local HAS_GRC = (_rc==0)

* Collect the per-sample PS graphs if they exist
local P_attr
foreach S of local samples {
    cap graph describe k_ps_sel_`S'
    if !_rc local P_attr `P_attr' k_ps_sel_`S'
}

* Use the first graph as legend source if using grc1leg
local refA : word 1 of `P_attr'

if "`P_attr'" != "" {
    if `HAS_GRC' {
        grc1leg `P_attr', cols(2) ycommon xcommon imargin(tiny) ///
            legendfrom(`refA') l1("Density") b1("Selection propensity score") ///
            title("Attrition diagnostics", size(medsmall)) ///
            subtitle("Pr(S=1 | D,X); S=0 vs S=1", size(small)) ///
            plotregion(lcolor(none)) graphregion(color(white)) ///
            name(Fig_ATTRITION_psdens_ALL, replace)
    }
    else {
        graph combine `P_attr', cols(2) ycommon xcommon imargin(tiny) ///
            title("Attrition diagnostics", size(medsmall)) ///
            graphregion(color(white)) ///
            name(Fig_ATTRITION_psdens_ALL, replace)
    }

    graph export "Figure_ATTRITION_pscore_densities_ALL.pdf", as(pdf) replace
}
else {
    di as txt "Note: no selection-PS graphs k_ps_sel_* were found."
}


********************************************************************
* NEW: 2b. ATTRITION / SELECTION DIAGNOSTICS BY GENDER
********************************************************************

cap log close
log using "Attrition_Diagnostics.log", text append

di as txt _n "==================================================================="
di as txt "ATTRITION / SELECTION DIAGNOSTICS for w_sel_S — BY GENDER"
di as txt "===================================================================" _n

foreach S of local samples {
    cap confirm variable `S'
    if _rc continue

    cap confirm variable w_sel_`S'
    if _rc {
        di as txt ">> Selection weights w_sel_`S' not found. Skipping `S' (by gender)."
        continue
    }

    local slab "`S'"
    capture confirm local lab_`S'
    if !_rc local slab "`lab_`S''"

    foreach g in 0 1 {
        count if inlist(`S',0,1) & female==`g'
        if r(N)==0 continue

        if `g'==0 local sexlbl "Men"
        else      local sexlbl "Women"

        di as txt _n "-------------------------------------------------------------------"
        di as txt "Selection / attrition diagnostics for `S' — `slab' (`sexlbl')"
        di as txt "Selection weight: w_sel_`S' = 1/Pr(`S'=1 | D,X)"

        preserve
            keep if inlist(`S',0,1) & female==`g'

            di as txt "Var                    Mean_base   Mean_S1_unw   Mean_S1_w    SMD_unw    SMD_w"

            foreach v of local attr_covars {
                capture confirm variable `v'
                if _rc continue

                quietly summarize `v'
                scalar m_base = r(mean)
                scalar sd_base = r(sd)

                quietly summarize `v' if `S'==1
                scalar m_s1_unw = r(mean)

                quietly summarize `v' [aw = w_sel_`S'] if `S'==1
                scalar m_s1_w   = r(mean)

                scalar diff_unw = m_s1_unw - m_base
                scalar diff_w   = m_s1_w   - m_base

                scalar smd_unw = .
                scalar smd_w   = .
                if sd_base>0 {
                    scalar smd_unw = diff_unw / sd_base
                    scalar smd_w   = diff_w   / sd_base
                }

                di as res %22s "`v'" "  " ///
                   %10.3f m_base "  " ///
                   %10.3f m_s1_unw "  " ///
                   %10.3f m_s1_w "  " ///
                   %10.3f smd_unw "  " ///
                   %10.3f smd_w
            }

            quietly summarize w_sel_`S' if `S'==1, detail
            di as txt "Selection weights w_sel_`S' (S=1, `sexlbl') summary:"
            di as res "  N=" %6.0f r(N) ///
                      "  mean=" %8.3f r(mean) ///
                      "  p1=" %8.3f r(p1) ///
                      "  p99=" %8.3f r(p99) ///
                      "  max=" %8.3f r(max)

            histogram w_sel_`S' if `S'==1, ///
                percent bin(40) ///
                title("Selection weights w_sel_`S'' – `slab' (`sexlbl')") ///
                xtitle("w_sel_`S'") ytitle("Percent") ///
                name(h_wsel_`S'_sex`g', replace)
            graph export "Figure_ATTRITION_weights_`S'_sex`g'.pdf", as(pdf) replace

            cap confirm variable ps_sel_`S'
            if !_rc {
                twoway ///
                    (kdensity ps_sel_`S' if `S'==0, n(200) lpattern(solid) lwidth(medthick)) ///
                    (kdensity ps_sel_`S' if `S'==1, n(200) lpattern(dash)  lwidth(medthick)), ///
                    scheme(s2mono) ///
                    title("`slab' (`sexlbl')") ///
                    legend(order(1 "S=0" 2 "S=1") cols(2) size(small) region(lcolor(none))) ///
                    xtitle("Pr(`S'=1 | D,X)") ytitle("Density") ///
                    plotregion(lcolor(none)) graphregion(color(white)) ///
                    name(k_ps_sel_`S'_sex`g', replace)
                graph export "Figure_ATTRITION_ps_`S'_sex`g'.pdf", as(pdf) replace
            }

        restore
    }
}

log close

*--------------------------------------------------------------*
* Combine selection PS densities across samples into figures
*   by gender:
*   -> Figure_ATTRITION_pscore_densities_Men.pdf
*   -> Figure_ATTRITION_pscore_densities_Women.pdf
*--------------------------------------------------------------*

cap which grc1leg
local HAS_GRC = (_rc==0)

local P_attr_men
local P_attr_women

foreach S of local samples {
    cap graph describe k_ps_sel_`S'_sex0
    if !_rc local P_attr_men `P_attr_men' k_ps_sel_`S'_sex0

    cap graph describe k_ps_sel_`S'_sex1
    if !_rc local P_attr_women `P_attr_women' k_ps_sel_`S'_sex1
}

local refAm : word 1 of `P_attr_men'
local refAw : word 1 of `P_attr_women'

if "`P_attr_men'" != "" {
    if `HAS_GRC' {
        grc1leg `P_attr_men', cols(2) ycommon xcommon imargin(tiny) ///
            legendfrom(`refAm') l1("Density") b1("Selection propensity score") ///
            title("Attrition diagnostics — Men", size(medsmall)) ///
            subtitle("Pr(S=1 | D,X); S=0 vs S=1", size(small)) ///
            plotregion(lcolor(none)) graphregion(color(white)) ///
            name(Fig_ATTRITION_psdens_Men, replace)
    }
    else {
        graph combine `P_attr_men', cols(2) ycommon xcommon imargin(tiny) ///
            title("Attrition diagnostics — Men", size(medsmall)) ///
            graphregion(color(white)) ///
            name(Fig_ATTRITION_psdens_Men, replace)
    }
    graph export "Figure_ATTRITION_pscore_densities_Men.pdf", as(pdf) replace
}

if "`P_attr_women'" != "" {
    if `HAS_GRC' {
        grc1leg `P_attr_women', cols(2) ycommon xcommon imargin(tiny) ///
            legendfrom(`refAw') l1("Density") b1("Selection propensity score") ///
            title("Attrition diagnostics — Women", size(medsmall)) ///
            subtitle("Pr(S=1 | D,X); S=0 vs S=1", size(small)) ///
            plotregion(lcolor(none)) graphregion(color(white)) ///
            name(Fig_ATTRITION_psdens_Women, replace)
    }
    else {
        graph combine `P_attr_women', cols(2) ycommon xcommon imargin(tiny) ///
            title("Attrition diagnostics — Women", size(medsmall)) ///
            graphregion(color(white)) ///
            name(Fig_ATTRITION_psdens_Women, replace)
    }
    graph export "Figure_ATTRITION_pscore_densities_Women.pdf", as(pdf) replace
}


********************************************************************
* END
********************************************************************/

